

########

construct_datasets2 <- function(months_to_keep) {
  cald %>%
    mutate(month = as.character(month)) %>%
    
    left_join(
      cifval %>% mutate(month = as.character(month)),
      by = c("hs08", "month")
    ) %>% 
    
    filter(month %in% months_to_keep) %>%
    
    mutate(
      across(
        c(cald_2024, cald_2025, cifval_2024, cifval_2025),
        ~ as.numeric(as.character(.x))
      )
    ) %>%
    
    # collapse to hs08 cumulative totals
    group_by(hs08) %>%
    summarise(
      cald24 = sum(cald_2024, na.rm = TRUE),
      cald25 = sum(cald_2025, na.rm = TRUE),
      cif24  = sum(cifval_2024, na.rm = TRUE),
      cif25  = sum(cifval_2025, na.rm = TRUE),
      .groups = "drop"
    ) %>%
    
    # filtered by the exempted HS08 codes
    semi_join(
      food_tariffs_hs08 %>% mutate(hs08 = as.character(hs08)),
      by = "hs08"
    ) %>%
    
    mutate(
      hs08 = as.character(hs08),
      hs02 = substring(hs08, 1, 2),
      hs04 = substring(hs08, 1, 4),
      hs06 = substring(hs08, 1, 6),
      category = case_when(
        hs08 == "20089915" ~ "banana",
        hs04 == "0901" ~ "coffee",
        hs04 == "0802" ~ "nuts",
        hs06 %in% c("080121", "080122", "080131", "080132") ~ "nuts",
        TRUE ~ NA_character_
      ))%>%
    filter(!is.na(category)) %>%

    group_by(category) %>%
    summarise(
      total_cald24 = sum(cald24, na.rm = TRUE),
      total_cald25 = sum(cald25, na.rm = TRUE),
      total_cif24  = sum(cif24,  na.rm = TRUE),
      total_cif25  = sum(cif25,  na.rm = TRUE),
      .groups = "drop"
    ) %>%
    mutate(
      rate24 = total_cald24 / total_cif24,
      rate25 = total_cald25 / total_cif25,
      diff   = rate25 - rate24
    )
}

product_level_data <- construct_datasets2(month.name[4:8])
